Baltimore Ceasefire 365 is a city-wide call asking Baltimore residents to avoid having any murders through quarterly Ceasefires and Peace Challenges (February, May, August, and November). In this post we use open data to model the impact of the ceasefire on shootings.

Shootings in Baltimore

Baltimore releases detailed data on issues relevant to the city, including crime. This allows us to get a good idea of the distribution of shootings in Baltimore.

Shootings per day seem to have increased over time, but the time series is complicated. Here’s how we got the data and created the above figure:

library(tidyverse)
library(scales)

# data came from here: https://data.baltimorecity.gov/Public-Safety/BPD-Part-1-Victim-Based-Crime-Data/wsfq-mvij/data
# but I'm loading a copy that I backed up on github
bpd <- read_csv("https://raw.githubusercontent.com/peterphalen/ceasefire/master/BPD_Part_1_Victim_Based_Crime_Data.csv")

# subset to shootings or homicides with a firearm
bpd <- subset(bpd, Description == "SHOOTING" |
                (Description == "HOMICIDE" & Weapon == "FIREARM"))

bpd$CrimeDate <- as.Date(bpd$CrimeDate, format = "%m/%d/%Y")

# there are many crimes per day. collapse to daily counts
daily <- bpd %>% group_by(CrimeDate) %>% summarise(shootings = n())

# fill missing dates, because some had no shootings
full.ts <- data.frame(CrimeDate = seq(daily$CrimeDate[1], 
                                      daily$CrimeDate[nrow(daily)], by="day"))
daily <- full_join(full.ts,daily)
daily <- daily %>% group_by(CrimeDate) %>% mutate_all(funs(ifelse(is.na(.),0,.)))

ggplot(daily) +
  aes(x=CrimeDate, y=shootings) +
  geom_point(alpha=.2) + 
  xlab("date") +
  ylab("shootings") +
  scale_y_continuous(breaks=c(0,4,8,12)) +
  scale_x_date(labels = date_format("%b %Y")) +
  ggtitle(" ", 
          subtitle="Baltimore (2012-present)")

These shootings disproportionately affect black communities. You can tap areas of the map to see which neighborhoods are most impacted.

Here is code to produce the above map—as well as a population-adjusted version (not run):

library(geojsonio)
library(leaflet)

bpd$Neighborhood <- as.character(bpd$Neighborhood)
bpd <- subset(bpd, !is.na(Neighborhood))

count <- bpd %>%
  group_by(Neighborhood) %>%
  summarise(total.count=n()) 

# get polygon data to draw neighborhoods.
# these shapes downloaded from Baltimore Open Data at https://gis-baltimore.opendata.arcgis.com/datasets/1ca93e68f11541d4b59a63243725c4b7_0.geojson
# but I'm pulling from a github backup for stability
nbds <- geojsonio::geojson_read("https://raw.githubusercontent.com/peterphalen/ceasefire/master/Neighborhoods.geojson", what = "sp")

get_shooting_count <- function(neighborhood){
  nbd <- as.character(neighborhood)
  if(nbd %in% count$Neighborhood){
    count <- count[count$Neighborhood == nbd,]$total.count
    return(count)
  }
  if(!(nbd %in% count$Neighborhood)){
    return(0)
  }
}

nbds$count <- sapply(nbds$Name, get_shooting_count)

# draw legend
range.count <- range(nbds$count,na.rm=T)
labs <- c(0,50,100,150,200)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')), labs)

leaflet(nbds) %>% 
  addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
  addPolygons(stroke=T,
              weight=1,
              popup=paste0(nbds$Name,"<br/>Shootings: ",nbds$count),
              color=~pal.crime(count),
              fillOpacity=.5) %>%
  addLegend("bottomright",title="# of Shootings (2012-present)",colors=~pal.crime(labs),labels=~labs)

#--------- population-adjusted --------------#
nbds$per1k <- nbds$count / nbds$Population * 1000
nbds$per1k <- round(nbds$per1k)
nbds$per1k <- ifelse(nbds$Population == 0, NA, nbds$per1k)
labs <- c(0,20,40,60)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')), 
                          labs,
                          na.color = "#b2b2b2")

countlabel <- paste0(nbds$Name,"<br/>",nbds$count," shootings among ",nbds$Population," residents")
nbds$countlabel <- ifelse(nbds$Population == 0, paste0(nbds$Name,":<br/>","No residents"), countlabel)

leaflet(nbds) %>% #draw population-adjusted map,
                  #areas with 0 residents are greyed 
                  #out but can still be clicked
  addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
  addPolygons(stroke=T,
              weight=1,
              popup=nbds$countlabel,
              color=~pal.crime(per1k),
              fillOpacity=.6) %>%
  addLegend("bottomright",title="Shootings per one</br>thousand residents</br>(2012-present)",colors=~pal.crime(labs),labels=~labs)

Ceasefire weekends

Ceasefires have been called four times per year since August 2017. These are “ceasefire weekends” but their impact often extends well beyond a few days (one lasted twelve).

Here are the first days of each ceasefire (all Fridays).

# first day (Friday) of ceasefire weekends
ceasefire.initial <- 
  as.Date(
  c("08/04/2017",
    "11/03/2017",
    "02/02/2018",
    "05/11/2018",
    "08/03/2018",
    "11/02/2018",
    "02/01/2019",
    "05/10/2019",
    "08/02/2019"),
      format="%m/%d/%Y")

We determine the weekends of each ceasefire so we can see whether these weekends had fewer shootings, after accounting for other trends and seasonal effects.

ceasefire.weekends <- lapply(ceasefire.initial, 
                               function(x){
                                 seq(from=x, 
                                   by="day", 
                                   length.out=3)})
ceasefire.weekends <- do.call("c", ceasefire.weekends)

Initial model

First we’re going to use the prophet package to check the rough effect of the ceasefire (coded as a “holiday”) while accounting for time trends and yearly and weekly seasonality. The prophet package was designed to be a good first pass: it gives you decent forecasts without a lot of manual effort.

We feed the ceasefire weekend dates into the model as special days or “holidays.”

ceasefires <- data_frame(
  holiday = 'ceasefire',
  ds = ceasefire.initial, # Fridays
  lower_window = 0,
  upper_window = 2 # for Sat and Sun
)

Then fit the model, accounting for general time trends as well as yearly and weekly seasonality.

library(prophet)

ts <- data.frame(ds=daily$CrimeDate,
                 y=daily$shootings)
m1 <- prophet(ts, 
              yearly.seasonality = T,
              weekly.seasonality = T, 
              mcmc.samples = 500,
              holidays = ceasefires,
              cores=4)

We plot the model against the data:

There are more shootings in the past few years, but clear seasonality. And check out the multiple downward blue spikes at the bottom-right of the plot…

Here is the decomposition of the above time series:

The “holidays” are ceasefires (those downward spikes). Ceasefires appear to be associated with fewer shootings, even after accounting for weekly seasonality, yearly seasonality, and overall time trends. The prophet code is really simple, which is nice. Here’s how we made the above plots:

future <- make_future_dataframe(m1, periods = 30)
forecast <- predict(m1, future)
plot(m1, forecast)
prophet_plot_components(m1, forecast)

Customized model

The prophet package is user-friendly but it doesn’t let us drill down and check the various effects for stuff like statistical significance. It also doesn’t allow us to use a poisson link function, which we want because the outcome is a count and has almost identical mean and standard deviation. So we’re going to fit our own Bayesian model in Stan using the more flexible rstanarm package.

We want to include information about the day of the week (Mon-Sun), the day of the year (0-364), and the date, as well as a binary variable indicating whether a date occurs during ceasefire.

library(lubridate)

daily$weekday <- factor(weekdays(daily$CrimeDate),
                          levels=c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))

daily$day.of.year <- yday(daily$CrimeDate)

# the julian calendar is a simple system for numeric dates
daily$jul <- julian(daily$CrimeDate)
daily$julz <- scale(daily$jul)[,1]

daily$ceasefire <- factor(ifelse(daily$CrimeDate %in% ceasefire.weekends, 1, 0),
                          labels=c("Regular Day","Ceasefire Weekend"))

Fit the model

We’ll predict shootings using a spline time trend, a cyclical spline for yearly seasonality, random effects for day of the week, and a binary indicator for the ceasefire. We use a Poisson link function because the outcome is a count and the mean is about the same as sd.

library(rstanarm)

m2 <- stan_gamm4(shootings ~ 
            s(julz) +
            s(day.of.year, 
             bs="cc") + #cyclical constraint 
            weekday + 
            ceasefire, 
           data=daily,
           cores=4,
           iter=500,
           family=poisson)

Here is a plot of the model against the observations:

Ceasefires are visible as six dramatic downward red spikes in 2017-2018. The results cohere well with prophet, but this model fits the data better in many ways. For example, we no longer see impossible predictions of less than zero shootings. Also, our use of rstanarm will allow us to analyze the model more carefully. Here’s how we created the above plot:

daily$Estimate <- apply(posterior_linpred(m2, transform=TRUE),
                        2, median)
# 80% posterior predictive interval for main plot
preds <- posterior_predict(m2, transform=TRUE)
daily$high <- apply(preds,2,function(x){quantile(x, prob=c(.9))})
daily$low <- apply(preds,2,function(x){quantile(x, prob=c(.1))})

daily %>% 
  ggplot(aes(x = CrimeDate, y = shootings)) +
  geom_point(alpha=.2) +
  geom_line(aes(y = Estimate), alpha=.5, color="red") +
  geom_ribbon(aes(ymin=low, ymax=high), alpha=.2) +
  scale_y_continuous(breaks=c(0,4,8,12)) +
  xlab("date") +
  theme_bw()

Plot model components

Here are the marginal seasonal and time trend effects, showing the components that make up the above time series:

Our model specification has slower-moving seasonal trends. Summers still show up as particularly bad.

Effect of Ceasefire

Finally, we can use this model to plot the effect of Ceasefires on shootings per day, after accounting for all these trends and seasonal effects:

On an average day in Baltimore, at least three people get shot. But Ceasefire weekends are different. After accounting for time trends and seasonal effects, Ceasefires appear to prevent about two shootings every day.

#----- code to produce final decomposition figures ------
p <- purrr::map(
       c("day.of.year [all]", "weekday", "julz [all]", "ceasefire"),
       ~ ggeffects::ggpredict(m2, .x, ci.lvl= .95) %>% plot() + theme_bw()
     )


doy.axis.dates <- seq(as.Date("0-01-01"),by="month",length.out=13)
p[[1]] <- p[[1]] + 
  xlab("Day of year") + 
  ggtitle(" ") +
  scale_x_continuous(
    breaks=yday(doy.axis.dates),
    labels=date_format("%b")(doy.axis.dates)
  )

p[[2]] <- p[[2]] + xlab("Day of week") + ggtitle(" ")

trend.axis.dates <- seq(from=as.Date("2012-01-01"),
                      by="year",
                      length.out=9)
p[[3]] <- p[[3]] + 
  xlab("Time trend") + 
  ggtitle(" ") +
  scale_x_continuous(
    breaks=scale(julian(trend.axis.dates),
                 center=mean(daily$jul),
                 scale=sd(daily$jul)),
    labels=date_format("%m-%Y")(trend.axis.dates))

library(gridExtra)
grid.arrange(p[[3]],p[[2]],p[[1]])

p[[4]] +  # ceasefires
  xlab("") + 
  ggtitle("Predicted counts of shootings",
          subtitle="with 95% credible intervals")